import pandas as pd
import numpy as np
import warnings
import datetime as dt

df = pd.read_excel('/Users/user/PycharmProjects/text1/pandas操作excel常见的二十种方式/示例数据.xlsx', header=0)

print(df.head(10))

df = df[df['薪资水平'] > 2000]
print(df.head(10))
df.index = range(df.shape[0])
print(len(df))
group_name = ['低', '高']
bins = [0, 15000, max(df['薪资水平'])]
df['new_clos'] = pd.cut(df['薪资水平'], bins, labels=group_name)

df.drop(columns=['new_clos'], axis=1, inplace=True)
print(df.head())
df = df.sort_values(by='创建时间', ascending=False).drop_duplicates(subset='创建时间', keep='first')
df.isnull().sum()
df['创建时间'] = df['创建时间'].apply(lambda x: x.strftime('%y-%m-%d'))
# bins = [0, 10000, max(df['薪资水平'])]
# group_names = ['低', '高']
# df['new_cols'] = pd.cut(df['薪资水平'], bins, labels=group_names)
# df.drop(columns=['new_cols'], axis=1, inplace=True)
# print(df.shape[0])
# df = df.sort_values(by='创建时间', ascending=False).drop_duplicates('创建时间')
# print(df.shape[0])
# df.isnull().sum()
# df.index = range(df.shape[0])
# print(df)
# df['创建时间'] = df['创建时间'].apply(lambda x: x.strftime('%Y-%m-%d'))
# print(df)

# col = df.columns[[0, 2, 1, 4, 5, 6]]
# df = df[col]
# print(df)
# df['hebiglie'] = df['技能要求'] + df['工作经验']
# print(df)
# df['技能要求'].apply(lambda x: x.split(',', expand=True))
# df.groupby('学历').mean()

# print(len(df[['薪资水平'] > 10000]))
